Controls Analysis after Batch correction

1 Libraries & custom functions

knitr::opts_chunk$set(
  cache = FALSE,
  concordance = TRUE,
  prompt = TRUE, # fig.width=5, fig.height=5,
  out.width = "100%",
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  error = TRUE,
  tidy = FALSE,
  comment = "")
> # from Bioconductor (install by running: BiocManager::install("x") | x = name of the package)
> # BiocManager::install(c("DESeq2", "phyloseq", "microbiome"))
> 
> #github functions
> # devtools::install_github("bryandmartin/corncob")
> # remotes::install_github("g-antonello/gautils")
> # remotes::install_github("mikemc/speedyseq")#
> # devtools::install_github("gaospecial/ggvenn")
> 
> library(gautils)
> library(magrittr)
> library(ggpubr)
> library(kableExtra)
> 
> # Load control samples data
> # not working yet, need to transfer from local to server
> 
> asvtable <- read_tsv("/shared/statgen/microbiome/CHRISMB/controls/controls_asv_table.tsv") %>% 
+   column_to_rownames("ASV")
> taxtab <- read_tsv("/shared/statgen/microbiome/CHRISMB/controls/controls_tax_table.tsv")  %>% 
+   set_rownames(as.character(.$ASV))
> metadata <- read_tsv("/shared/statgen/microbiome/CHRISMB/controls/controls_meta_table.tsv")
> 
> refseq <- Biostrings::readDNAStringSet(filepath = "/shared/statgen/microbiome/CHRISMB/controls/controls_refseq.phy")
> 
> # check identity of rownames and colnames. for the ASVs, the reduce/Reduce strategy did not work
> samples_order_identical <- all(purrr::reduce(list(colnames(asvtable), metadata$Sample_ID), identical))
> ASVs_order_identical <- all(sapply(1:nrow(expand.grid(c(1,2,3), c(1,2,3))), function(i) identical(list(rownames(asvtable), rownames(taxtab), names(refseq))[[expand.grid(c(1,2,3), c(1,2,3))[i,1]]],list(rownames(asvtable), rownames(taxtab), names(refseq))[[expand.grid(c(1,2,3), c(1,2,3))[i,2]]])) )
> 
> if(samples_order_identical & ASVs_order_identical){
+   phy_only_ctrls <- phyloseq(otu_table(asvtable, taxa_are_rows = T), tax_table(as.matrix(taxtab)), sample_data(metadata %>% set_rownames(NULL)), refseq(refseq))  
+ } else{
+   stop("Rownames or colnames of phyloseq component tables do not match")
+ }

2 Beta diversity

> # all 
> theme_set(theme_light())
> phy_only_ctrls_nonZero <- subset_samples(phy_only_ctrls, counts_per_sample > 0)
> phy_only_ctrls_nonZero <- subset_taxa(phy_only_ctrls_nonZero, taxa_sums(phy_only_ctrls_nonZero) > 0)
> 
> ord <- ordinate(microbiome::transform(phy_only_ctrls_nonZero, "compositional"), method = "PCoA", distance = "bray")
> 
> basic_plot <- plot_ordination(physeq = phy_only_ctrls_nonZero,
+                 ordination = ord,
+                 color = "ctrl_type"
+                 )
> 
> pcoa_ctrls_final_all <- basic_plot + ggforce::geom_mark_ellipse(aes(group= ctrl_type, colour = ctrl_type, label = ctrl_type)) + ggtitle("All controls")
> 
> # only zymo
> phy_only_ctrls_nonZero <- subset_samples(phy_only_ctrls, counts_per_sample > 0 & ctrl_type == "zymo_pos")
> phy_only_ctrls_nonZero <- subset_taxa(phy_only_ctrls_nonZero, taxa_sums(phy_only_ctrls_nonZero) > 0)
> 
> ord <- ordinate(microbiome::transform(phy_only_ctrls_nonZero, "compositional"), method = "PCoA", distance = "bray")
> 
> basic_plot <- plot_ordination(physeq = phy_only_ctrls_nonZero,
+                 ordination = ord,
+                 color = "batch"
+                 )
> 
> pcoa_ctrls_final_zymo <- basic_plot + ggforce::geom_mark_ellipse(aes(group= batch, colour = batch, label = batch)) + ggtitle("Zymo")
> 
> # only extr ctrl
> 
> phy_only_ctrls_nonZero <- subset_samples(phy_only_ctrls, counts_per_sample > 0 & ctrl_type == "DNA_extraction_ctrl")
> phy_only_ctrls_nonZero <- subset_taxa(phy_only_ctrls_nonZero, taxa_sums(phy_only_ctrls_nonZero) > 0)
> 
> ord <- ordinate(microbiome::transform(phy_only_ctrls_nonZero, "compositional"), method = "PCoA", distance = "bray")
> 
> basic_plot <- plot_ordination(physeq = phy_only_ctrls_nonZero,
+                 ordination = ord,
+                 color = "batch"
+                 )
> 
> pcoa_ctrls_final_DNA_extr <- basic_plot + ggforce::geom_mark_ellipse(aes(group= batch, colour = batch, label = batch)) + ggtitle("DNA Extraction controls")
> 
> # only water ctrl
> 
> 
> phy_only_ctrls_nonZero <- subset_samples(phy_only_ctrls, counts_per_sample > 0 & ctrl_type == "water_ctrl")
> phy_only_ctrls_nonZero <- subset_taxa(phy_only_ctrls_nonZero, taxa_sums(phy_only_ctrls_nonZero) > 0)
> 
> ord <- ordinate(microbiome::transform(phy_only_ctrls_nonZero, "compositional"), method = "PCoA", distance = "bray")
> 
> basic_plot <- plot_ordination(physeq = phy_only_ctrls_nonZero,
+                 ordination = ord,
+                 color = "batch"
+                 )
> 
> pcoa_ctrls_final_water <- basic_plot + ggforce::geom_mark_ellipse(aes(group= batch, colour = batch, label = batch)) + ggtitle("Water controls")
> 
> 
> ggsave(filename = "pcoa_bray_controls.png", height = 8, width = 10, dpi = 300, 
+        plot = ggpubr::ggarrange(pcoa_ctrls_final_all, ggpubr::ggarrange(pcoa_ctrls_final_zymo,pcoa_ctrls_final_DNA_extr,pcoa_ctrls_final_water, nrow = 1, ncol = 3, common.legend = TRUE, legend = "top"), nrow = 2, ncol = 1, heights = c(0.8,1))
+        )
> 
> ggpubr::ggarrange(pcoa_ctrls_final_all, ggpubr::ggarrange(pcoa_ctrls_final_zymo,pcoa_ctrls_final_DNA_extr,pcoa_ctrls_final_water, nrow = 1, ncol = 3, common.legend = TRUE, legend = "top"), nrow = 2, ncol = 1, heights = c(0.8,1))

3 Zymo controls

3.1 Heatmap by ASV

3.1.1 All batches

> phy_ctrls_zymo <- subset_samples(phy_only_ctrls, ctrl_type == "zymo_pos")
> phy_ctrls_zymo <- subset_taxa(phy_ctrls_zymo, taxa_sums(phy_ctrls_zymo)/(sum(taxa_sums(phy_ctrls_zymo)))*100 > 0.1)
> 
> 
> 
> plot_all_batches <- phy_ctrls_zymo %>% 
+   abundances("compositional") %>% 
+   pheatmap::pheatmap(scale = "none", annotation_col = select(meta(phy_ctrls_zymo), batch))

3.1.2 CHRISMB-only batches

> phy_ctrls_zymo_chrismb <- filter_sample_data(phy_ctrls_zymo, batch %in% as.character(c(1:5, 7)))
> phy_ctrls_zymo_chrismb <- subset_taxa(phy_ctrls_zymo_chrismb, taxa_sums(phy_ctrls_zymo_chrismb)>0)
> 
> plot_chrismb_batches <- phy_ctrls_zymo_chrismb %>% 
+   abundances("compositional") %>% 
+   pheatmap::pheatmap(scale = "none", annotation_col = select(meta(phy_ctrls_zymo), batch))

Batch 4 has enough clustering to deserve some attention, let’s keep it in mind

3.1.3 PCoA

> plot_ordination(phy_ctrls_zymo_chrismb, 
+                 ordination,type = "samples", 
+                 color = "batch")+
+   #scale_color_brewer(type = "div")+
+   scale_color_viridis_d()+
+   stat_ellipse(aes(group = batch))+ 
+   geom_point(size = 3)

##{-}

3.2 barplot

> phy_ctrls_zymo_relabund <- microbiome::transform(phy_ctrls_zymo, "compositional")
> sample_names(phy_ctrls_zymo_relabund) <- paste0("plate",phy_ctrls_zymo_relabund@sam_data$plate, "_batch", phy_ctrls_zymo_relabund@sam_data$batch)
> 
> plotly::ggplotly(plot_bar(phy_ctrls_zymo_relabund, fill = "ASV")+ scale_fill_viridis_d()+scale_color_viridis_d() + ggtitle("Filtered for ASVs > 0.1%"))

3.3 how much variability does batch explain?

Plate explains 60% of the variability, sequencing 30%. Nicely, this is expected, because sequencing should be more accurate than the previous steps necessary to prepare the library.

> zymo_dist_bray <- phyloseq::distance(phy_ctrls_zymo_relabund, method = "bray")
> library(vegan)
> betadisper(zymo_dist_bray, group = phy_ctrls_zymo_relabund@sam_data$batch) %>% permutest()

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df    Sum Sq    Mean Sq      F N.Perm Pr(>F)
Groups     9 0.0016822 0.00018691 1.0264    999   0.42
Residuals 20 0.0036421 0.00018210                     
> zymo_permanova <- vegan::adonis2(formula = zymo_dist_bray ~ plate + batch, data = microbiome::meta(phy_ctrls_zymo_relabund))
> 
> kableExtra::kbl(zymo_permanova, digits = 2) %>% 
+   kableExtra::kable_styling()
Df SumOfSqs R2 F Pr(>F)
plate 4 0.03 0.61 30.20 0
batch 8 0.01 0.30 7.52 0
Residual 17 0.00 0.09 NA NA
Total 29 0.04 1.00 NA NA